pacman::p_load(dplyr, ggplot2, tm, SnowballC, wordcloud2, RColorBrewer,
plotly, stringr, d3heatmap, htmlwidgets)load('data/X.rdata')
summary(X) url sub date
Length:10760 Business & Finance :2249 Min. :2010-02-17
Class :character R&D :1857 1st Qu.:2013-06-03
Mode :character Grid Connection :1319 Median :2015-03-23
Authorities :1131 Mean :2015-03-05
Technology :1077 3rd Qu.:2017-03-02
Operations & Maintenance: 947 Max. :2019-04-12
(Other) :2180
title abstract author tags
Length:10760 Length:10760 Length:10760 Length:10760
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
text rov
Length:10760 Min. : 0.000
Class :character 1st Qu.: 0.000
Mode :character Median : 0.000
Mean : 0.033
3rd Qu.: 0.000
Max. :13.000
par(cex=0.8, mar=c(6,4,4,2))
hist(X$date, "year", freq=T, main="No. Articles per Year", las=2, xlab="")par(cex=0.8, mar=c(4,12,4,2))
table(X$sub) %>% sort %>%
barplot(las=2, horiz=T, main="No. Articles per Subject", xlab="freq")X %>%
mutate(year = as.integer(format(date,"%Y"))) %>%
group_by(year, sub) %>% count %>%
ggplot(aes(x=year, y=n, fill=sub)) +
geom_bar(stat="identity", position="fill") +
scale_x_continuous(breaks=2009:2019) -> p
ggplotly(p)1.png
2.png
3.png
4.png
5.png
X$rov = str_count(X$text, " ROV| ROUV")
rx = xtabs(rov ~ sub + format(date, "%Y"), X)
rx format(date, "%Y")
sub 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
Authorities 0 0 0 0 0 0 1 1 0 0
Business & Finance 0 0 0 5 32 7 21 2 10 0
Contracts & Tenders 0 0 0 0 0 0 0 5 3 1
Environment 0 0 0 0 0 0 0 0 1 0
Grid Connection 0 9 13 12 1 1 5 8 2 0
Industry Contribution 0 0 0 0 0 0 0 0 0 0
Jobs & Recruitment 0 0 0 0 0 0 0 0 0 0
Operations & Maintenance 0 1 7 1 10 9 3 6 11 2
Ports & Logistics 0 0 1 0 0 0 0 1 0 0
R&D 0 0 3 8 16 7 15 3 0 0
Technology 3 11 2 8 13 5 6 8 8 0
Training & Education 0 0 2 0 14 1 0 0 0 0
Vessels 0 0 0 0 1 11 7 6 5 0
Wind Farm Update 0 0 0 0 0 0 0 0 0 0
rx %>% as.data.frame.matrix %>% d3heatmap(T,F, col="Greens")6.png
7.png
8.png
9.png
10.png